## Warning: replacing previous import 'BiocGenerics::Position' by
## 'ggplot2::Position' when loading 'phyloseq'
##     phyloseq      ggplot2 RColorBrewer         plyr        dplyr 
##         TRUE         TRUE         TRUE         TRUE         TRUE 
##        tidyr        knitr     magrittr          ape        vegan 
##         TRUE         TRUE         TRUE         TRUE         TRUE 
##       ggtree      cowplot      ggrepel       gtable    gridExtra 
##         TRUE         TRUE         TRUE         TRUE         TRUE

Read files for OTU analyses

#Import BIOM, sample data and tree files
OTU_file <- "../data/clean/Seqs_11_12.OTU.biom"
Metadata_file <- "../data/clean/env_metadata.csv"

Oom_OTU <- import_biom(OTU_file)

#Oom_tree <- read_tree(Tree_file)
Oom_data <- import_qiime_sample_data(Metadata_file)

#Covert year to factor
Oom_data$Year <- as.factor(Oom_data$Year)

#Rename tax ranks to actual names
colnames(tax_table(Oom_OTU)) <- c("Phylum","Class","Order",
                                  "Family","Genus","Clade","Species")

#Merge phyloseq objects
Oom_biom <- merge_phyloseq(Oom_OTU, Oom_data) 

Read files for phylotype analyses

#Import files
otu_phylo <- read.csv("../data/clean/OTU.phylotype.txt", sep = "\t", 
                      row.names = 1, header = TRUE)
taxa_phylo <- read.csv("../data/clean/Taxa.phylotype.txt", sep = "\t", 
                       row.names = 1, header = TRUE)

#Transform to phyloseq objects
otu_phylo <- otu_table(otu_phylo, taxa_are_rows = TRUE)
taxa_phylo <- tax_table(as.matrix(taxa_phylo))

#Phyloseq object
Oom_phylo <- phyloseq(otu_phylo, taxa_phylo)
Oom_phylo <- merge_phyloseq(Oom_phylo, Oom_data)

Richness analyses

#Plotting richness
plot_richness(Oom_biom, "St_Yr", 
              measures = c("InvSimpson","Shannon","Chao1"), color = "Year")
## Warning: Removed 250 rows containing missing values (geom_errorbar).

#Table richness
Tb_richness <- estimate_richness(Oom_biom, 
                                 split = TRUE, 
                                 c("Observed", "Shannon", "Simpson")) %>% 
  add_rownames(var = "sample") %>%
  mutate(Evenness = Shannon/log(Observed))

samp_size <- colSums(otu_table(Oom_biom))
samp_size <- data.frame(samp_size) %>% add_rownames(var = "sample")

smp_state <- data.frame(sample_data(Oom_biom)[,1:5]) %>% 
  add_rownames(var = "sample")

Tb_richness_final <- left_join(Tb_richness, samp_size, by = "sample") %>%
  left_join(smp_state, by = "sample") %>%
  filter(Observed > 1) %>%
  ddply(c("St_Yr"), summarise, 
        N = length(sample),
        Isolates = sum(samp_size),
        mean.Observed = mean(Observed),
        sd.Observed = sd(Observed, na.rm = TRUE),
        mean.Shannon = mean(Shannon),
        sd.Shannon = sd(Shannon, na.rm = TRUE),
        mean.Simpson = mean(Simpson),
        sd.Simpson = sd(Simpson, na.rm = TRUE),
        mean.Evenness = mean(Shannon/log(Observed)),
        sd.Evenness = sd(Shannon/log(Observed), na.rm = TRUE))

kable(Tb_richness_final, digits = 3)
St_Yr N Isolates mean.Observed sd.Observed mean.Shannon sd.Shannon mean.Simpson sd.Simpson mean.Evenness sd.Evenness
Arkansas 2011 1 320 14.000 NA 1.191 NA 0.597 NA 0.451 NA
Arkansas 2012 5 74 8.600 3.050 1.886 0.247 0.809 0.027 0.908 0.064
Illinois 2011 6 243 9.000 3.225 1.663 0.403 0.730 0.119 0.771 0.136
Illinois 2012 6 147 7.167 1.472 1.621 0.188 0.745 0.094 0.838 0.117
Indiana 2011 5 398 10.200 1.789 1.562 0.136 0.688 0.059 0.680 0.087
Indiana 2012 4 30 4.750 1.893 1.349 0.464 0.688 0.137 0.927 0.070
Iowa 2011 9 398 6.889 3.257 1.092 0.627 0.482 0.269 0.572 0.259
Iowa 2012 4 19 2.750 0.957 0.828 0.209 0.514 0.105 0.889 0.171
Kansas 2011 7 213 7.429 2.760 1.363 0.550 0.615 0.261 0.701 0.283
Kansas 2012 6 93 6.667 2.658 1.591 0.513 0.729 0.137 0.870 0.114
Michigan 2011 9 188 5.889 2.804 1.404 0.437 0.695 0.117 0.884 0.096
Michigan 2012 7 134 8.286 3.729 1.752 0.486 0.761 0.125 0.866 0.113
Minnesota 2011 6 185 10.667 6.186 1.859 0.537 0.774 0.104 0.827 0.102
Minnesota 2012 6 130 8.167 4.070 1.762 0.487 0.776 0.113 0.888 0.033
N Dakota 2011 9 210 9.556 5.637 1.784 0.635 0.758 0.162 0.874 0.168
N Dakota 2012 6 162 10.667 2.875 1.916 0.281 0.793 0.065 0.823 0.094
Nebraska 2011 4 75 7.750 3.686 1.654 0.457 0.750 0.102 0.865 0.076
Nebraska 2012 3 49 6.667 4.041 1.638 0.612 0.764 0.131 0.930 0.027
Ontario 2012 1 64 9.000 NA 0.854 NA 0.334 NA 0.389 NA
S Dakota 2011 5 23 2.800 1.304 0.901 0.359 0.559 0.116 0.953 0.078
S Dakota 2012 5 114 13.000 3.808 2.385 0.295 0.889 0.038 0.942 0.035
Wisconsin 2011 6 51 4.667 1.966 1.241 0.532 0.619 0.205 0.846 0.174
#Summarizing by state
Oom_phylo_state <- tax_glom(Oom_phylo, taxrank = "Clade")
Oom_phylo_state <- merge_samples(Oom_phylo_state, "St_Yr")

#Transform counts for plot
#Oom_phylo_state <- transform_sample_counts(Oom_phylo_state, 
#                                           function(x) 100 * x/sum(x))
State_Year <- psmelt(Oom_phylo_state)

#Color scale
pal <- colorRampPalette(brewer.pal(12, "Paired"))

#Reorder factor
Clade_factor <- State_Year %>% group_by(Clade) %>% dplyr::summarise(sum(Abundance))
Clade_factor <- Clade_factor[order(-Clade_factor$`sum(Abundance)`),]
Clade_factor <- Clade_factor$Clade
State_Year$Clade <- factor(State_Year$Clade, levels = Clade_factor)
levels(State_Year$Clade)
##  [1] "Pythium_Clade_F"      "Pythium_Clade_B"      "Pythium_Clade_I"     
##  [4] "Pythium_Clade_J"      "Pythium_Clade_E"      "Phytophthora_Clade_7"
##  [7] "Pythium_sp."          "Pythium_Clade_D"      "Phytopythium"        
## [10] "Phytophthora_Clade_8" "Phytophthora_Clade_6" "Pythium_Clade_A"     
## [13] "Pythium_Clade_G"      "Aphanomyces"          "Phytophthora_sp."    
## [16] "Pythiogeton"
data_state <- dplyr::select(State_Year, Sample, Abundance, Clade)
data_state <- data_state[with(data_state, order(Clade, as.numeric(Clade))),]

#Plot
(ByState <- ggplot(data = data_state, aes(Sample, Abundance, fill = Clade)) +
  geom_bar(stat = "identity", position = position_fill()) + coord_flip() +
  scale_fill_manual(values = pal(18)) + 
   theme(text = element_text(size = 15)) + theme_gray())

Rarefaction curves

## DATA ##

psdata <- merge_samples(Oom_phylo, "St_Yr")
psdata
## phyloseq-class experiment-level object
## otu_table()   OTU Table:         [ 83 taxa and 22 samples ]
## sample_data() Sample Data:       [ 22 samples by 46 sample variables ]
## tax_table()   Taxonomy Table:    [ 83 taxa by 7 taxonomic ranks ]
sample_sums(psdata)
##  Arkansas 2011  Arkansas 2012  Illinois 2011  Illinois 2012   Indiana 2011 
##            320             76            244            149            400 
##   Indiana 2012      Iowa 2011      Iowa 2012    Kansas 2011    Kansas 2012 
##             33            397             24            216             94 
##  Michigan 2011  Michigan 2012 Minnesota 2011 Minnesota 2012  N Dakota 2011 
##            192            137            187            131            167 
##  N Dakota 2012  Nebraska 2011  Nebraska 2012   Ontario 2012  S Dakota 2011 
##            163             75             50             64             24 
##  S Dakota 2012 Wisconsin 2011 
##            117            113
### Calculate alpha diversity ###
set.seed(42)

calculate_rarefaction_curves <- function(psdata, measures, depths) {
  require('plyr') # ldply
  require('reshape2') # melt

  estimate_rarified_richness <- function(psdata, measures, depth) {
    if(max(sample_sums(psdata)) < depth) return()
    psdata <- prune_samples(sample_sums(psdata) >= depth, psdata)

    rarified_psdata <- rarefy_even_depth(psdata, depth, verbose = FALSE)

    alpha_diversity <- estimate_richness(rarified_psdata, measures = measures)

    # as.matrix forces the use of melt.array, which includes the Sample names (rownames)
    molten_alpha_diversity <- melt(as.matrix(alpha_diversity), varnames = c('Sample', 'Measure'), value.name = 'Alpha_diversity')

    molten_alpha_diversity
  }

  names(depths) <- depths # this enables automatic addition of the Depth to the output by ldply
  rarefaction_curve_data <- ldply(depths, estimate_rarified_richness, psdata = psdata, measures = measures, .id = 'Depth', .progress = ifelse(interactive(), 'text', 'none'))

  # convert Depth from factor to numeric
  rarefaction_curve_data$Depth <- as.numeric(levels(rarefaction_curve_data$Depth))[rarefaction_curve_data$Depth]

  rarefaction_curve_data
}

rarefaction_curve_data <- calculate_rarefaction_curves(psdata, c('Observed', 'Shannon'), rep(c(1, 5, 10, 1:20 * 10), each = 10))
## Loading required package: reshape2
summary(rarefaction_curve_data)
##      Depth                  Sample         Measure     Alpha_diversity 
##  Min.   :  1.00   Arkansas.2011: 460   Observed:3380   Min.   : 0.000  
##  1st Qu.: 10.00   Illinois.2011: 460   Shannon :3380   1st Qu.: 1.932  
##  Median : 60.00   Indiana.2011 : 460                   Median : 2.589  
##  Mean   : 67.64   Iowa.2011    : 460                   Mean   : 7.106  
##  3rd Qu.:110.00   Kansas.2011  : 460                   3rd Qu.:13.000  
##  Max.   :200.00   Michigan.2011: 440                   Max.   :27.000  
##                   (Other)      :4020
### Summarize alpha diversity ###
rarefaction_curve_data_summary <- ddply(rarefaction_curve_data, c('Depth', 'Sample', 'Measure'), summarise, Alpha_diversity_mean = mean(Alpha_diversity), Alpha_diversity_sd = sd(Alpha_diversity))

### Add sample data ###
smp_dt <- as.data.frame(sample_data(psdata))
row.names(smp_dt) <- gsub(pattern = " ",replacement = ".", x = row.names(smp_dt))
rarefaction_curve_data_summary_verbose <- merge(rarefaction_curve_data_summary, smp_dt,
                                                by.x = 'Sample', by.y = 'row.names')

### plot ###
ggplot(
  data = rarefaction_curve_data_summary_verbose,
  mapping = aes(
    x = Depth,
    y = Alpha_diversity_mean,
    ymin = Alpha_diversity_mean - Alpha_diversity_sd,
    ymax = Alpha_diversity_mean + Alpha_diversity_sd,
    colour = State2,
    group = Sample)) + 
  geom_smooth() + 
  #geom_pointrange() + 
  facet_wrap(facets = ~ Measure, scales = 'free_y') + 
  theme_gray()

Latitude/Longitude gradient

#Richness data
richness2 <- estimate_richness(Oom_biom, 
                                 split = TRUE, 
                                 c("Observed", "Shannon", "Simpson", "Chao1", "InvSimpson")) %>% 
  add_rownames(var = "sample")


samp_size <- colSums(otu_table(Oom_biom))
samp_size <- data.frame(samp_size) %>% add_rownames(var = "sample")

#Sample data
smp_state2 <- data.frame(sample_data(Oom_biom)[,c(1:5,8,9)]) %>% 
  add_rownames(var = "sample")
smp_state2 <- left_join(smp_state2, samp_size, by = "sample")

#Merging tables together
lt_rch_data <- left_join(smp_state2, richness2, by = "sample")
lt_rch_data <- lt_rch_data[!is.na(lt_rch_data[,7]),]
lt_rch_data <- lt_rch_data[(lt_rch_data[,9]) >= 10,]

#Correlation
cor.test(lt_rch_data$Lat, lt_rch_data$Observed, method = "spearman")
## Warning in cor.test.default(lt_rch_data$Lat, lt_rch_data$Observed, method =
## "spearman"): Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  lt_rch_data$Lat and lt_rch_data$Observed
## S = 86029, p-value = 0.0824
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.1883719
cor.test(lt_rch_data$Lat, lt_rch_data$Simpson, method = "spearman")
## Warning in cor.test.default(lt_rch_data$Lat, lt_rch_data$Simpson, method =
## "spearman"): Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  lt_rch_data$Lat and lt_rch_data$Simpson
## S = 80545, p-value = 0.02596
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.2401049
cor.test(lt_rch_data$Lat, lt_rch_data$Shannon, method = "spearman")
## Warning in cor.test.default(lt_rch_data$Lat, lt_rch_data$Shannon, method =
## "spearman"): Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  lt_rch_data$Lat and lt_rch_data$Shannon
## S = 78536, p-value = 0.01602
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##       rho 
## 0.2590589
cor.test(lt_rch_data$Long, lt_rch_data$Shannon, method = "spearman")
## Warning in cor.test.default(lt_rch_data$Long, lt_rch_data$Shannon, method =
## "spearman"): Cannot compute exact p-value with ties
## 
##  Spearman's rank correlation rho
## 
## data:  lt_rch_data$Long and lt_rch_data$Shannon
## S = 129750, p-value = 0.03803
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.2241301
#Plot
(Obs_plot <- ggplot(lt_rch_data, aes(x = Lat, y = Observed)) + 
  geom_point(size = 2, alpha = 0.5) + geom_smooth(method = lm) + theme_gray() +
  labs(x = "Latitude", y = "Observed OTUs") +
  annotate("text", x=45, y=0.4, label = "p-value = 0.0813\nrho = 0.189", 
           fontface = "bold", hjust = 0))

(Obs_plot2 <- ggplot(lt_rch_data, aes(x = Lat, y = Shannon)) + 
  geom_point(size = 2, alpha = 0.5) + geom_smooth(method = lm) + theme_gray() +
  labs(x = "Latitude", y = "Shannon diversity index") +
  annotate("text", x=45, y=0.4, label = "p-value = 0.016\nrho = 0.258", 
           fontface = "bold", hjust = 0))

(Obs_plot3 <- ggplot(lt_rch_data, aes(x = Long, y = Shannon)) + 
  geom_point(size = 2, alpha = 0.5) + geom_smooth(method = lm) + theme_gray() +
  labs(x = "Longitude", y = "Shannon diversity index") +
  annotate("text", x=-88, y=0.4, label = "p-value = 0.037\nrho = -0.224", 
           fontface = "bold", hjust = 0))

#grid.arrange(Obs_plot2, Obs_plot3)

Cluster analysis - OTU

#Phyloseq otu table to vegan otu table
Oom_st <- prune_samples(!(grepl('MICO|ONSO', sample_names(Oom_biom))), Oom_biom)
Oom_st <- merge_samples(Oom_biom, "St_Yr")
tb_otu <- veganotu(Oom_st)
tb_otu <- tb_otu[row.names(tb_otu) != "Ontario 2012",]

#Phyloseq sample to vegan sample table
tb_sample <- vegansam(Oom_st)

#Relative abundance and bray-curtis distance
tb_otu <- decostand(tb_otu, method = "total")
tb_otu.bc <- vegdist(tb_otu, method = "bray")
tb_otu.pa.bc <- vegdist(tb_otu, method = "bray", binary = TRUE)

tb.otu.cl <- as.phylo(hclust(tb_otu.bc, method = "ward.D2"))
tb.otu.pa <- as.phylo(hclust(tb_otu.pa.bc, method = "ward.D2"))

cls <- list(c1 = c("N Dakota 2011", "N Dakota 2012", 
                   "Minnesota 2011", "Minnesota 2012"),
            c2 = c("S Dakota 2011", "S Dakota 2012", "Iowa 2011", 
                   "Iowa 2012", "Nebraska 2011"),
            c3 = c("Wisconsin 2011", "Illinois 2011", "Illinois 2012", 
                   "Indiana 2011", "Indiana 2012", "Michigan 2011", 
                   "Michigan 2012"),
            c4 = c("Kansas 2011", "Kansas 2012", "Arkansas 2011", 
                   "Arkansas 2012"))

tb.otu.cl <- groupOTU(tb.otu.cl, cls)
tb.otu.pa <- groupOTU(tb.otu.pa, cls)

t1.otu <- ggtree(tb.otu.cl, branch.length = 'branch.length') + 
  #geom_text(aes(label = label, hjust = -0.05)) + 
  ggplot2::xlim(0,1) + geom_tiplab(aes(color = group, label = label)) +
  scale_color_brewer(type = "div", palette = "Set1")
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
ggtree(tb.otu.pa) + geom_text(aes(label = label, hjust = -0.05)) + ggplot2::xlim(0,0.6)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Warning: Removed 20 rows containing missing values (geom_text).

t2.otu <- ggtree(tb.otu.pa, branch.length = 'branch.length') + 
  #geom_text(aes(label = label, hjust = -0.05)) + 
  ggplot2::xlim(0,0.6) + geom_tiplab(aes(color = group, label = label)) +
  scale_color_brewer(type = "div", palette = "Set1")
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
gridExtra::grid.arrange(t1.otu, t2.otu, ncol = 2)

Cluster analysis - phylotype

#Phyloseq otu table to vegan otu table
Oom_ph.1 <- prune_samples(!(grepl('MICO|ONSO', sample_names(Oom_phylo))), Oom_phylo)
Oom_ph <- merge_samples(Oom_ph.1, "St_Yr")
tb_ph <- veganotu(Oom_ph)
tb_ph <- tb_ph[row.names(tb_ph) != "Ontario 2012",]

#Phyloseq sample to vegan sample table
tb_sp_ph <- vegansam(Oom_ph)

#Relative abundance and bray-curtis distance
tb_ph <- decostand(tb_ph, method = "total")
tb_ph.bc <- vegdist(tb_ph, method = "bray")
tb_ph.pa.bc <- vegdist(tb_ph, method = "jaccard", binary = TRUE)

tb.ph.cl <- as.phylo(hclust(tb_ph.bc))
tb.ph.pa <- as.phylo(hclust(tb_ph.pa.bc))

tb.ph.cl <- groupOTU(tb.ph.cl, cls)
tb.ph.pa <- groupOTU(tb.ph.pa, cls)


ggtree(tb.ph.cl, layout="rectangular") + 
  geom_text(aes(label = label, hjust = -0.05)) + ggplot2::xlim(0,0.6) + theme_tree2()
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Warning: Removed 20 rows containing missing values (geom_text).

t1.ph <- ggtree(tb.ph.cl, branch.length = 'branch.length') + 
  #geom_text(aes(label = label, hjust = -0.05)) + 
  ggplot2::xlim(0,0.6) + geom_tiplab(aes(label = label)) +
  scale_color_brewer(type = "div", palette = "Set1") + xlab("Height") + ylab("Cluster dendogram")
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
ggtree(tb.ph.pa) + 
  geom_text(aes(label = label, hjust = -0.05)) + ggplot2::xlim(0,0.6)
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Warning: Removed 20 rows containing missing values (geom_text).

t2.ph <- ggtree(tb.ph.pa, branch.length = 'branch.length') + 
  #geom_text(aes(label = label, hjust = -0.05)) + 
  ggplot2::xlim(0,0.6) + geom_tiplab(aes(color = group, label = label)) +
  scale_color_brewer(type = "div", palette = "Set1")
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
#gridExtra::grid.arrange(t1.ph, t2.ph, ncol = 2)
#gridExtra::grid.arrange(t1.otu, t2.otu, t1.ph, t2.ph, ncol = 2)
plot_grid(t1.ph, Obs_plot2, labels = c("A", "B"), ncol = 1,
          align = "h"
          #rel_widths = c(0.85,1,1), rel_heights = c(0.5,1,1) 
          )

(plot_grid(t1.ph, Obs_plot2, Obs_plot3, labels = c("A", "B", "C"), ncol = 1,
          align = "h"
          #rel_widths = c(0.85,1,1), rel_heights = c(0.5,1,1) 
          ))

Analysis of similarity (ANOSIM) for different parameters

Oom_grp <- get_variable(Oom_biom, "State2")
Oom_st_ano <- anosim(distance(Oom_biom, "bray"), Oom_grp)

Oom_biom_lt <- prune_samples(!is.na(Oom_biom@sam_data$Lat), Oom_biom)

Lat_grp <- cut(get_variable(Oom_biom_lt, "Lat"), c(32,42,50))
(Oom_lt_ano <- anosim(distance(Oom_biom_lt, "bray"), Lat_grp))
## 
## Call:
## anosim(dat = distance(Oom_biom_lt, "bray"), grouping = Lat_grp) 
## Dissimilarity: bray 
## 
## ANOSIM statistic R: 0.1033 
##       Significance: 0.001 
## 
## Permutation: free
## Number of permutations: 999
Long_grp <- cut(get_variable(Oom_biom_lt, "Long"), c(-80,-95,-110))
(Oom_lg_ano <- anosim(distance(Oom_biom_lt, "bray"), Long_grp))
## 
## Call:
## anosim(dat = distance(Oom_biom_lt, "bray"), grouping = Long_grp) 
## Dissimilarity: bray 
## 
## ANOSIM statistic R: 0.04035 
##       Significance: 0.094 
## 
## Permutation: free
## Number of permutations: 999
Yr_grp <- get_variable(Oom_biom, "Year")
Oom_yr_ano <- anosim(distance(Oom_biom, "bray"), Yr_grp)

StYr_grp <- get_variable(Oom_biom, "St_Yr")
Oom_styr_ano <- anosim(distance(Oom_biom, "bray"), StYr_grp)

ADONIS for different parameters

df <- as(sample_data(Oom_biom), "data.frame")
d <- distance(Oom_biom, "bray")

Oom_adonis <- adonis(d ~ State2 + Year + State2*Year, df)

kable(Oom_adonis$aov.tab, digits = 3, 
      caption = "__Table 2.__ Comparison of community structure (beta diversity)\
      using Bray-curtis distance by State and year.")
Table 2. Comparison of community structure (beta diversity) using Bray-curtis distance by State and year.
Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
State2 11 9.576 0.871 2.908 0.207 0.001
Year 1 0.702 0.702 2.346 0.015 0.002
State2:Year 9 5.259 0.584 1.952 0.113 0.001
Residuals 103 30.835 0.299 NA 0.665 NA
Total 124 46.371 NA NA 1.000 NA

Ordination analysis

Oom_biom2 <- prune_samples(!is.na(Oom_biom@sam_data$Lat), Oom_biom)

colors2 <- c("#77C7C6",
"#C37F3B",
"#869BCF",
"#7DD54E",
"#C67BB7",
"#CDC84A",
"#CC6569",
"#83D693",
"#7A7678",
"#698547",
"#D3C1A7")

Oom_biom_ord <- ordinate(Oom_biom2, "PCoA", "bray")
ord_plot <- plot_ordination(Oom_biom2, Oom_biom_ord, color = "State2", shape = "Year")
(ord_plot.f <- ord_plot + geom_point(size = 4, alpha = 0.7) + 
  scale_colour_manual(values = colors2) +
  theme_light() + labs(color = "State", shape = "Year"))

Environmental/Edaphic factor analysis

## Environment fit analysis
bray.pcoa <- ordinate(Oom_biom2, method = "PCoA", "bray")
env <- as.data.frame(Oom_biom2@sam_data)

Oom_env <- envfit(bray.pcoa$vectors, env, permutations = 999)
## Warning in as.matrix.data.frame(P): Setting class(x) to NULL; result will
## no longer be an S4 object
fit_data <- as.data.frame(scores(Oom_env, display = "vectors")) %>%
  add_rownames(var = "Env.var") %>%
  bind_cols(data.frame(Oom_env$vectors$r, Oom_env$vectors$pvals)) %>%
  rename(R2 = Oom_env.vectors.r, P.value = Oom_env.vectors.pvals) %>%
  arrange(P.value)
  
## Supplementary material version

kable(fit_data, digits = 3, caption = "__Supp. table 1.__ Significance and correlation\
of vectors fitted into PCoA ordination of oomycete communities associated with\
soybean seedlings")
Supp. table 1. Significance and correlation of vectors fitted into PCoA ordination of oomycete communities associated with soybean seedlings
Env.var Axis.1 Axis.2 R2 P.value
Long -0.025 0.401 0.161 0.001
Precip -0.279 0.275 0.154 0.001
Precip_AMJ_12 -0.101 -0.349 0.132 0.001
Precip_AMJ_11 -0.303 0.278 0.169 0.001
Precip_AMJ -0.405 -0.024 0.165 0.001
Precip_30yr_avg -0.279 0.189 0.113 0.002
Tmin_AMJ_11 -0.290 0.109 0.096 0.002
Db3rdbar 0.016 0.307 0.095 0.004
Tmean_AMJ_12 -0.295 -0.057 0.090 0.004
Lat 0.308 -0.053 0.097 0.005
Tmean_AMJ_11 -0.296 0.061 0.091 0.005
TMin_30yr_avg -0.284 0.102 0.091 0.006
Tmax_AMJ_12 -0.296 -0.047 0.090 0.006
Tmin_AMJ_12 -0.283 -0.066 0.084 0.007
TMean_30yr_avg -0.286 0.069 0.087 0.008
Tmax_AMJ_11 -0.289 0.019 0.084 0.008
TMax_30yr_avg -0.284 0.037 0.082 0.012
Clay -0.066 -0.259 0.072 0.019
CEC7 -0.116 -0.234 0.068 0.020
pHwater 0.251 -0.084 0.070 0.020
TMin_yr -0.198 0.160 0.065 0.023
WC3rdbar -0.131 -0.222 0.066 0.028
Tmin_AMJ -0.241 0.029 0.059 0.030
EC 0.196 -0.119 0.053 0.046
Sand 0.153 0.167 0.051 0.048
TMean_yr -0.188 0.093 0.044 0.084
OrgMatter 0.181 -0.077 0.038 0.096
Silt -0.190 -0.031 0.037 0.120
Tmean_AMJ -0.188 0.013 0.036 0.133
Tmax_yr -0.171 0.028 0.030 0.173
ECEC -0.088 0.138 0.027 0.226
Tmax_AMJ -0.140 0.001 0.020 0.336
Slope_Avg -0.053 0.096 0.012 0.520
Shape_Area -0.103 -0.011 0.011 0.562
Shape_Length -0.079 -0.051 0.009 0.602
AWC -0.034 -0.077 0.007 0.659
Aspect 0.003 0.055 0.003 0.850
## Reduced version

#kable(fit_data[fit_data$P.value < 0.05,], digits = 3, caption = "__Table 3.__ Significance and correlation\
#of vectors fitted into PCoA ordination of oomycete communities associated with\
#soybean seedlings")

Results ordination and environmental data

## Vectors for plot
fit_reduced <- fit_data[fit_data$P.value < 0.05,] 

fit_plot <- as.data.frame(scores(Oom_env, display = "vectors")) %>%
  add_rownames(var = "Env.var") %>%
  inner_join(fit_reduced, by = "Env.var") %>%
  arrange(P.value) %>%
  slice(c(10,1,5,19,18,8,20,22,2,17,15,12,21,6,23))

fit_plot$Env.var2 <- c("Latitude", "Longitude", "Precip. Season","CEC", "Clay (%)", 
                       "Bulk density", "Soil pH", "Water content", 
                       "Annual Total Precip.", "Max. Temp 30yr", 
                       "Mean Temp 30yr", "Min. Temp 30yr",
                       "Annual Min. Temp","Precip. 30yr", "Min. Temp Season")

## paper version
kable(fit_plot, digits = 3, caption = "__Table 3.__ Significant factors using\
      ‘envfit’ function from vegan that affect oomycete community associated\
      with soybean seedlings.")
Table 3. Significant factors using ‘envfit’ function from vegan that affect oomycete community associated with soybean seedlings.
Env.var Axis.1.x Axis.2.x Axis.1.y Axis.2.y R2 P.value Env.var2
Lat 0.308 -0.053 0.308 -0.053 0.097 0.005 Latitude
Long -0.025 0.401 -0.025 0.401 0.161 0.001 Longitude
Precip_AMJ -0.405 -0.024 -0.405 -0.024 0.165 0.001 Precip. Season
CEC7 -0.116 -0.234 -0.116 -0.234 0.068 0.020 CEC
Clay -0.066 -0.259 -0.066 -0.259 0.072 0.019 Clay (%)
Db3rdbar 0.016 0.307 0.016 0.307 0.095 0.004 Bulk density
pHwater 0.251 -0.084 0.251 -0.084 0.070 0.020 Soil pH
WC3rdbar -0.131 -0.222 -0.131 -0.222 0.066 0.028 Water content
Precip -0.279 0.275 -0.279 0.275 0.154 0.001 Annual Total Precip.
TMax_30yr_avg -0.284 0.037 -0.284 0.037 0.082 0.012 Max. Temp 30yr
TMean_30yr_avg -0.286 0.069 -0.286 0.069 0.087 0.008 Mean Temp 30yr
TMin_30yr_avg -0.284 0.102 -0.284 0.102 0.091 0.006 Min. Temp 30yr
TMin_yr -0.198 0.160 -0.198 0.160 0.065 0.023 Annual Min. Temp
Precip_30yr_avg -0.279 0.189 -0.279 0.189 0.113 0.002 Precip. 30yr
Tmin_AMJ -0.241 0.029 -0.241 0.029 0.059 0.030 Min. Temp Season
ord_plot.data <- plot_ordination(Oom_biom2, Oom_biom_ord, 
                            color = "State2", shape = "Year", justDF = TRUE)

(ord.plot.env <- ggplot(data = ord_plot.data, aes(x = Axis.1, y = Axis.2)) + 
  geom_point(aes(color=State2, shape=Year), size = 4, alpha = 0.7) + 
  #scale_color_brewer(type = "div", palette ="Spectral") +
  labs(color = "State", shape = "Year", x = "PCoA 1 [12.1%]", y = "PCoA 2 [9.4%]") +
  scale_colour_manual(values = colors2) +
  geom_segment(data = fit_plot, aes(x = 0, xend = Axis.1.x, y = 0, yend = Axis.2.x), 
               arrow = arrow(length = unit(0.1,"cm")), color = "black", size = 0.8) + 
  geom_label_repel(data = fit_plot, aes(x = Axis.1.x, y = Axis.2.x, label = Env.var2), 
            size = 3, force = 1) + #facet_wrap(~Year) +
  theme_gray())

Correlation of environmental parameters with PCoA axes

#Season precipitation correlation with axis
P.cor <- cor.test(ord_plot.data[,"Axis.1"], log10(ord_plot.data[,"Precip_AMJ"]), method = "spearman")
## Warning in cor.test.default(ord_plot.data[, "Axis.1"],
## log10(ord_plot.data[, : Cannot compute exact p-value with ties
Tmin.cor <- cor.test(ord_plot.data[,"Axis.1"], ord_plot.data[,"Tmin_AMJ"], method = "spearman")
## Warning in cor.test.default(ord_plot.data[, "Axis.1"], ord_plot.data[,
## "Tmin_AMJ"], : Cannot compute exact p-value with ties
Clay.cor <- cor.test(ord_plot.data[,"Axis.2"], ord_plot.data[,"Clay"], method = "spearman")
## Warning in cor.test.default(ord_plot.data[, "Axis.2"], ord_plot.data[,
## "Clay"], : Cannot compute exact p-value with ties
Blk.cor <- cor.test(ord_plot.data[,"Axis.2"], ord_plot.data[,"Db3rdbar"], method = "spearman")
## Warning in cor.test.default(ord_plot.data[, "Axis.2"], ord_plot.data[,
## "Db3rdbar"], : Cannot compute exact p-value with ties
Lat.cor <- cor.test(ord_plot.data[,"Axis.1"], ord_plot.data[,"Lat"], method = "spearman")
## Warning in cor.test.default(ord_plot.data[, "Axis.1"], ord_plot.data[,
## "Lat"], : Cannot compute exact p-value with ties
Long.cor <- cor.test(ord_plot.data[,"Axis.2"], ord_plot.data[,"Long"], method = "spearman")
## Warning in cor.test.default(ord_plot.data[, "Axis.2"], ord_plot.data[,
## "Long"], : Cannot compute exact p-value with ties
#Plotting ordination 
Prec <- ggplot(ord_plot.data, aes(x = Axis.1, y = Precip_AMJ)) + 
  geom_smooth(method = lm, color = "gray14") +
  geom_point(aes(color = Lat), size = 3) + 
  theme_gray() +
  scale_color_gradientn(colours= c("#ef8a62","#67a9cf")) +
  labs(y = "Precipitation season (mm)", color = "Latitude") +
  annotate("text", x = 0.15, y = 210, 
           label = paste("p-value =", format(P.cor$p.value, digits = 3),
                         "\nrho =", round(P.cor$estimate, digits = 3)),
                         fontface = "bold", hjust = 0)


Tmin <- ggplot(ord_plot.data, aes(x = Axis.1, y = Tmin_AMJ)) + 
  geom_smooth(method = lm, color = "gray14") +
  geom_point(aes(color = Lat), size = 3) + 
  theme_gray() +
  scale_color_gradientn(colours= c("#ef8a62","#67a9cf")) +
  labs(y = "Minimum temperature season", color = "Latitude") +
  annotate("text", x = 0.15, y = 16, 
           label = paste("p-value =", format(Tmin.cor$p.value, digits = 3,
                                             scientific = TRUE),
                         "\nrho =", round(Tmin.cor$estimate, digits = 3)),
                         fontface = "bold", hjust = 0)

Clay <- ggplot(ord_plot.data, aes(x = Clay, y = Axis.2)) + 
  geom_smooth(method = lm, color = "gray14") +
  geom_point(aes(color = WC3rdbar), size = 3) + 
  theme_gray() +
  scale_color_gradientn(colours= c("#ef8a62","#67a9cf")) +
  labs(x = "Clay content (%)", color = "Vol. water\ncontent (%)") +
  annotate("text", x = 50, y = 0.38, 
           label = paste("p-value =", format(Clay.cor$p.value, digits = 3, 
                                             scientific = TRUE),
                         "\nrho =", round(Clay.cor$estimate, digits = 3)),
                         fontface = "bold", hjust = 0)

Blk <- ggplot(ord_plot.data, aes(x = Db3rdbar, y = Axis.2)) + 
  geom_smooth(method = lm, color = "gray14") +
  geom_point(aes(color = WC3rdbar), size = 3) + 
  theme_gray() +
  scale_color_gradientn(colours= c("#ef8a62","#67a9cf")) +
  labs(x = "Bulk density (g/cm3)", color = "Vol. water\ncontent (%)") +
  annotate("text", x = 1.58, y = 0.4, 
           label = paste("p-value =", format(Blk.cor$p.value, digits = 3,
                                             scientific = TRUE),
                         "\nrho =", round(Blk.cor$estimate, digits = 3)),
                         fontface = "bold", hjust = 0)

#Grid arregement for linear graphs
plot_grid(Prec, Tmin, Clay, Blk, labels = c("A", "B", "C", "D"), 
          ncol = 2, nrow = 2, align = "h")

Mantel test for parameters

Oom.dist <- distance(Oom_biom2, "bray")
env2 <- data.frame(env[,-c(1:7,12,24)])
env2.dist <- vegdist(env2$Lat, method = "euclidean")

# test.m <- mantel(Oom.dist, env2.dist, method = "pearson")
# test.m$statistic
# 
# length(colnames(env2))

factor_mantel <- function(dist, env){
  n <- length(colnames(env))
  df <- data.frame(Env.var = character(0), stat = numeric(0), pval = numeric(0))
  for (i in seq(1,n,1)) {
    factor_dist <- vegdist(env[,i], method = "euclidean")
    mt_test <- mantel(dist, factor_dist, method = "spearman")
    df <- rbind(df, data.frame(Env.var = colnames(env[i]), 
                               Statistic = mt_test$statistic, 
                               p.val = mt_test$signif))
    }
  df
}


mantel_table <- factor_mantel(Oom.dist, env2)
cor.table <- left_join(fit_data, mantel_table, by = "Env.var")
## Warning in left_join_impl(x, y, by$x, by$y): joining factor and character
## vector, coercing into character vector
kable(cor.table, digits = 3)
Env.var Axis.1 Axis.2 R2 P.value Statistic p.val
Long -0.025 0.401 0.161 0.001 0.083 0.003
Precip -0.279 0.275 0.154 0.001 0.117 0.003
Precip_AMJ_12 -0.101 -0.349 0.132 0.001 0.014 0.338
Precip_AMJ_11 -0.303 0.278 0.169 0.001 0.076 0.039
Precip_AMJ -0.405 -0.024 0.165 0.001 0.070 0.031
Precip_30yr_avg -0.279 0.189 0.113 0.002 0.103 0.004
Tmin_AMJ_11 -0.290 0.109 0.096 0.002 0.103 0.010
Db3rdbar 0.016 0.307 0.095 0.004 0.023 0.275
Tmean_AMJ_12 -0.295 -0.057 0.090 0.004 0.155 0.001
Lat 0.308 -0.053 0.097 0.005 0.115 0.008
Tmean_AMJ_11 -0.296 0.061 0.091 0.005 0.142 0.001
TMin_30yr_avg -0.284 0.102 0.091 0.006 0.085 0.030
Tmax_AMJ_12 -0.296 -0.047 0.090 0.006 0.158 0.001
Tmin_AMJ_12 -0.283 -0.066 0.084 0.007 0.144 0.001
TMean_30yr_avg -0.286 0.069 0.087 0.008 0.122 0.002
Tmax_AMJ_11 -0.289 0.019 0.084 0.008 0.173 0.001
TMax_30yr_avg -0.284 0.037 0.082 0.012 0.153 0.001
Clay -0.066 -0.259 0.072 0.019 0.151 0.003
CEC7 -0.116 -0.234 0.068 0.020 0.080 0.039
pHwater 0.251 -0.084 0.070 0.020 0.044 0.097
TMin_yr -0.198 0.160 0.065 0.023 0.068 0.054
WC3rdbar -0.131 -0.222 0.066 0.028 0.143 0.005
Tmin_AMJ -0.241 0.029 0.059 0.030 0.106 0.009
EC 0.196 -0.119 0.053 0.046 -0.011 0.584
Sand 0.153 0.167 0.051 0.048 0.059 0.091
TMean_yr -0.188 0.093 0.044 0.084 0.112 0.010
OrgMatter 0.181 -0.077 0.038 0.096 -0.017 0.641
Silt -0.190 -0.031 0.037 0.120 0.035 0.170
Tmean_AMJ -0.188 0.013 0.036 0.133 0.121 0.004
Tmax_yr -0.171 0.028 0.030 0.173 0.137 0.001
ECEC -0.088 0.138 0.027 0.226 0.011 0.430
Tmax_AMJ -0.140 0.001 0.020 0.336 0.119 0.003
Slope_Avg -0.053 0.096 0.012 0.520 -0.023 0.693
Shape_Area -0.103 -0.011 0.011 0.562 -0.008 0.566
Shape_Length -0.079 -0.051 0.009 0.602 -0.055 0.872
AWC -0.034 -0.077 0.007 0.659 0.020 0.306
Aspect 0.003 0.055 0.003 0.850 0.032 0.122

Abundance of top 8 pathogenic species

##Top 8 species
top.sp <- names(sort(taxa_sums(Oom_phylo), TRUE)[1:8])
oom.sp <- prune_taxa(top.sp, Oom_phylo)
oom.sp.st <- merge_samples(oom.sp, "State2")

##Function
getLabelPoint <- function(county) {Polygon(county[c('long', 'lat')])@labpt}

#map data
library(maps)
## 
##  # maps v3.1: updated 'world': all lakes moved to separate new #
##  # 'lakes' database. Type '?world' or 'news(package="maps")'.  #
## 
## Attaching package: 'maps'
## The following object is masked from 'package:plyr':
## 
##     ozone
library(sp)
states <- map_data("state")
centroids <- by(states, states$region, getLabelPoint)
centroids <- do.call("rbind.data.frame", centroids)
names(centroids) <- c('long', 'lat')

centroids[row.names(centroids) == "michigan",]$long <- -84.62014
centroids[row.names(centroids) == "michigan",]$lat <- 43.49422

#Transform counts for plot
oom.data.pie <- transform_sample_counts(oom.sp.st, 
                                           function(x) 100 * x/sum(x))
oom.data.pie <- psmelt(oom.data.pie)
oom.data.pie <- oom.data.pie[,c(1:3,6,55,56)] %>%
                mutate(sample = tolower(Sample))

oom.data.pie$sample <- gsub("n dakota","north dakota", oom.data.pie$sample)
oom.data.pie$sample <- gsub("s dakota","south dakota", oom.data.pie$sample)
oom.data.pie$Species <- factor(oom.data.pie$Species,
                               levels(oom.data.pie$Species)[c(7,3,6,8,1,2,5,4)])
#data
oom.pie.0 <- add_rownames(centroids, "sample") %>%
  left_join({distinct(oom.data.pie)}) %>%
  filter(!is.na(Clade))
## Joining by: "sample"
oom.pie <- oom.pie.0 %>% split(., .$sample)

#Color scale
pal2 <- colorRampPalette(brewer.pal(8, "Set3"))

#Pie chart
pies <- setNames(lapply(1:length(oom.pie), function(i){
  ggplot(oom.pie[[i]], aes(x=1, Abundance, fill=Species)) +
    geom_bar(stat="identity", width=1, color="black") + 
    coord_polar(theta="y") + 
    theme_tree() + 
    xlab(NULL) + 
    ylab(NULL) + 
    theme_transparent() +
    scale_fill_manual(values = pal2(8)) +
    theme(plot.margin=unit(c(0,0,0,0),"mm"))
}), names(oom.pie))

#Legend
e1 <- ggplot(oom.pie[[2]], aes(x=1, Abundance, fill=Species)) +
        geom_bar(stat="identity", width=1) + 
        coord_polar(theta="y") + 
  scale_fill_manual(values = pal2(8), 
                    labels = c("Py. sylvaticum","Py. heterothallicum", "Py. oopapillum",
                               "Py. ultimum var. ultimum","Py. aff. dissotocum",
                               "Py. aff. torulosum", "Py. lutarium","Py. irregulare")) +
  theme(legend.text = element_text(face = "italic"))

leg1 <- gtable_filter(ggplot_gtable(ggplot_build(e1)), "guide-box") 

#map
states2 <- subset(states, region %in% c(unique(oom.data.pie$sample), "missouri"))
map.p <- ggplot(states2, aes(long, lat, group=group)) +  
    geom_polygon(fill="gray90", color = "gray20", size=0.125) +
    xlim(-105,-78) +
    theme_transparent(axis.title = element_blank(),
                      axis.text = element_blank(),
                      axis.line = element_blank(),
                      axis.ticks = element_blank()) +
      annotation_custom(grob = leg1, xmin = -81, xmax = -83, ymin = 33, ymax = 40) 

#Final plot
n <- length(pies)
for (i in 1:n) {
    nms <- names(pies)[i]
    dat <- oom.pie.0[which(oom.pie.0$sample == nms)[1], ]
    map.p <- subview(map.p, pies[[i]], x = unlist(dat[["long"]])[1], y=unlist(dat[["lat"]])[1], 0.07, 0.07)
}

print(map.p)

Correlation of individual species with different parameters using occupancy models

library(ggradar)
library(scales)
library(MASS)
#Oom_phylo.sp <- transform_sample_counts(Oom_phylo, 
#                                     function(x) x/sum(x))
test <- psmelt(Oom_phylo)
head(test)
##         OTU  Sample Abundance   group State   State2 Year         St_Yr
## 8981 Otu074  ARSO_1       103  ARSO_1  ARSO Arkansas 2011 Arkansas 2011
## 9208 Otu076  INSO_1        95  INSO_1  INSO  Indiana 2011  Indiana 2011
## 5959 Otu049  ARSO_1        79  ARSO_1  ARSO Arkansas 2011 Arkansas 2011
## 7878 Otu065  ARSO_1        66  ARSO_1  ARSO Arkansas 2011 Arkansas 2011
## 1060 Otu009 ONSO2_1        52 ONSO2_1 ONSO2  Ontario 2012  Ontario 2012
## 7449 Otu062  IASO_7        45  IASO_7  IASO     Iowa 2011     Iowa 2011
##      CDL_2011 CDL_2012   Lat   Long Slope_Avg Aspect        SurfText   AWC
## 8981 Soybeans Soybeans 34.46 -91.42     0.057    167       Silt loam 0.180
## 9208     Corn Soybeans 40.30 -86.89     0.685    205       Silt loam 0.144
## 5959 Soybeans Soybeans 34.46 -91.42     0.057    167       Silt loam 0.180
## 7878 Soybeans Soybeans 34.46 -91.42     0.057    167       Silt loam 0.180
## 1060                      NA     NA        NA     NA                    NA
## 7449     Corn Soybeans 40.91 -93.76     0.605     69 Silty clay loam 0.173
##        CEC7   Clay Db3rdbar    EC   ECEC OrgMatter pHwater   Sand   Silt
## 8981 35.000 32.300    1.360 0.000  9.700     0.840   5.400 13.400 54.300
## 9208 17.598 24.428    1.596 0.000 14.037     1.213   6.421 27.994 47.579
## 5959 35.000 32.300    1.360 0.000  9.700     0.840   5.400 13.400 54.300
## 7878 35.000 32.300    1.360 0.000  9.700     0.840   5.400 13.400 54.300
## 1060     NA     NA       NA    NA     NA        NA      NA     NA     NA
## 7449 27.358 36.761    1.431 0.995  0.000     0.814   6.167  9.302 53.937
##      WC3rdbar Water_Source Shape_Length Shape_Area   Precip TMax_30yr_avg
## 8981   31.700     Rain-fed      384.217   8819.025 1375.578         22.51
## 9208   28.819     Rain-fed      385.419   8361.445 1215.619         16.45
## 5959   31.700     Rain-fed      384.217   8819.025 1375.578         22.51
## 7878   31.700     Rain-fed      384.217   8819.025 1375.578         22.51
## 1060       NA                        NA         NA       NA            NA
## 7449   32.988     Rain-fed      387.859   8912.774  946.014         15.81
##      TMean_30yr_avg TMin_30yr_avg Tmax_yr TMean_yr TMin_yr Precip_30yr_avg
## 8981          16.89         11.27  22.897   17.283  11.668         1257.97
## 9208          10.79          5.12  16.558   11.199   5.840          983.87
## 5959          16.89         11.27  22.897   17.283  11.668         1257.97
## 7878          16.89         11.27  22.897   17.283  11.668         1257.97
## 1060             NA            NA      NA       NA      NA              NA
## 7449          10.00          4.19  15.798    9.858   3.918          941.92
##      Tmin_AMJ_12 Tmin_AMJ_11 Tmean_AMJ_12 Tmean_AMJ_11 Tmax_AMJ_12
## 8981      17.210      17.097       22.838       22.832      28.467
## 9208      11.157      11.263       17.840       16.752      24.523
## 5959      17.210      17.097       22.838       22.832      28.467
## 7878      17.210      17.097       22.838       22.832      28.467
## 1060          NA          NA           NA           NA          NA
## 7449      10.833       9.597       17.537       15.442      24.240
##      Tmax_AMJ_11 Precip_AMJ_12 Precip_AMJ_11 Tmin_AMJ Tmean_AMJ Tmax_AMJ
## 8981      28.567        61.440       171.210   17.097    22.832   28.567
## 9208      22.240        54.863       156.853   11.263    16.752   22.240
## 5959      28.567        61.440       171.210   17.097    22.832   28.567
## 7878      28.567        61.440       171.210   17.097    22.832   28.567
## 1060          NA            NA            NA       NA        NA       NA
## 7449      21.287        88.110       155.497    9.597    15.442   21.287
##      Precip_AMJ           Phylum     Class     Order     Family
## 8981    171.210 Heterokontophyta Oomycetes Pythiales Pythiaceae
## 9208    156.853 Heterokontophyta Oomycetes Pythiales Pythiaceae
## 5959    171.210 Heterokontophyta Oomycetes Pythiales Pythiaceae
## 7878    171.210 Heterokontophyta Oomycetes Pythiales Pythiaceae
## 1060         NA Heterokontophyta Oomycetes Pythiales Pythiaceae
## 7449    155.497 Heterokontophyta Oomycetes Pythiales Pythiaceae
##             Genus                Clade              Species
## 8981      Pythium      Pythium_Clade_F     Pythium_spinosum
## 9208      Pythium      Pythium_Clade_F   Pythium_sylvaticum
## 5959      Pythium      Pythium_Clade_F   Pythium_irregulare
## 7878      Pythium      Pythium_Clade_F Pythium_paroecandrum
## 1060 Phytophthora Phytophthora_Clade_7   Phytophthora_sojae
## 7449      Pythium      Pythium_Clade_B   Pythium_oopapillum
test %>% filter(Species == "Pythium_sylvaticum"| 
                Species == "Pythium_heterothallicum"|
                Species == "Pythium_oopapillum"|
                Species == "Pythium_ultimum_var._ultimum"|
                Species == "Pythium_aff._dissotocum"|
                Species == "Pythium_aff._torulosum") %>% 
  group_by(Species) %>%
  filter(!is.na(Clay)) %>%
  filter(CEC7 < 100) -> test_dat


pp1 <- ggplot(test_dat, aes(x=pHwater, y=log10(Abundance + 1))) + 
  geom_point(aes(colour=Lat), size = 3, alpha=0.9) + 
  geom_smooth(se = FALSE, method = "glm.nb", colour="grey30") +
  facet_wrap(~ Species) + labs(x="soil pH") +   
  scale_color_continuous(name="Latitude", na.value = "grey50", 
                         low = "#ef8a62", high = "#67a9cf") +
  theme_bw()

pp2 <-ggplot(test_dat, aes(x=Clay, y=log10(Abundance + 1))) + 
  geom_point(aes(colour=Lat), size = 3, alpha=0.9) + 
  geom_smooth(se = FALSE, method = "glm.nb", colour="grey30") +
  facet_wrap(~ Species) + labs(x="Clay percent (%)") +   
  scale_color_continuous(name="Latitude", na.value = "grey50", 
                         low = "#ef8a62", high = "#67a9cf") +
  theme_bw()

pp3 <-ggplot(test_dat, aes(x=Precip_AMJ, y=log10(Abundance + 1))) + 
  geom_point(aes(colour=Lat), size = 3, alpha=0.8) + 
  geom_smooth(se = FALSE, method = "glm.nb", colour="grey30") +
  facet_wrap(~ Species) + labs(x="Seasonal precipitation (mm)") +   
  scale_color_continuous(name="Latitude", na.value = "grey50", 
                         low = "#ef8a62", high = "#67a9cf") +
  theme_bw()

pp4 <- ggplot(test_dat, aes(x=Tmin_AMJ, y=log10(Abundance + 1))) + 
  geom_point(aes(colour=Lat), size = 3, alpha=0.9) + 
  geom_smooth(se = FALSE, method = "glm.nb", colour="grey30") +
  facet_wrap(~ Species) + labs(x="Season minimum temperature (ºC)") +   
  scale_color_continuous(name="Latitude", na.value = "grey50", 
                         low = "#ef8a62", high = "#67a9cf") +
  expand_limits(x=c(4,18)) +
  theme_bw()


pp5 <- ggplot(test_dat[test_dat$CEC < 100,], aes(x=CEC7, y=log10(Abundance + 1))) + 
  geom_point(aes(colour=Lat), size = 3, alpha=0.9) + 
  geom_smooth(se = FALSE, method = "glm.nb", colour="grey30") +
  facet_wrap(~ Species) + labs(x="Cation exchange capacity (meq/100g)") +   
  scale_color_continuous(name="Latitude", na.value = "grey50", 
                         low = "#ef8a62", high = "#67a9cf") +
  theme_bw()

plot_grid(pp1, pp2, labels = c("A", "B"), 
          ncol = 1, nrow = 2, align = "h")

plot_grid(pp3, pp4, labels = c("C", "D"), 
          ncol = 1, nrow = 2, align = "h")

plot_grid(pp5, labels ="E", 
          ncol = 1, nrow = 2, align = "h")

Ordination by taxa

Oom_biom2 <- prune_samples(!is.na(Oom_biom@sam_data$Lat), Oom_biom)

Oom_biom_ord <- ordinate(Oom_biom2, "PCoA", "bray")
ord_plot2 <- plot_ordination(Oom_biom2, Oom_biom_ord, type = "taxa", color = "Clade")
ord_plot.f2 <- ord_plot2 + geom_point(size = 4, alpha = 0.7) +
  theme_light() + labs(color = "Clade")

## Environment fit analysis
bray.pcoa2 <- ordinate(Oom_biom2, method = "PCoA", "bray")
#env3 <- vegansam(Oom_biom2) %>% select("CDL_2011":"Precip_AMJ")

Oom_env2 <- envfit(bray.pcoa$vectors, env, permutations = 999)
## Warning in as.matrix.data.frame(P): Setting class(x) to NULL; result will
## no longer be an S4 object
fit_data2 <- as.data.frame(scores(Oom_env2, display = "vectors")) %>%
  add_rownames(var = "Env.var") %>%
  bind_cols(data.frame(Oom_env2$vectors$r, Oom_env2$vectors$pvals)) %>%
  rename(R2 = Oom_env2.vectors.r, P.value = Oom_env2.vectors.pvals) %>%
  arrange(P.value)
  

## Vectors for plot
fit_reduced2 <- fit_data2[fit_data2$P.value < 0.05,] 

fit_plot2 <- as.data.frame(scores(Oom_env2, display = "vectors")) %>%
  add_rownames(var = "Env.var") %>%
  inner_join(fit_reduced, by = "Env.var") 


ord_plot.data2 <- plot_ordination(Oom_biom2, Oom_biom_ord, 
                                  type = "taxa", color = "Clade", justDF = TRUE)

(ord.plot.env2 <- ggplot(data = ord_plot.data2, aes(x = Axis.1, y = Axis.2)) + 
  labs(color = "Clade", x = "PCoA 1 [12.1%]", y = "PCoA 2 [9.4%]") +
  geom_segment(data = fit_plot2, aes(x = 0, xend = Axis.1.x, y = 0, yend = Axis.2.x), 
               arrow = arrow(length = unit(0.1,"cm")), color = "black", size = 0.8) + 
  geom_label_repel(data = fit_plot2, aes(x = Axis.1.x, y = Axis.2.x, label = Env.var), 
            size = 2, force = 1, label.size = 0.15) + 
  geom_point(aes(color=Clade), size = 4, alpha = 0.7) + 
  facet_wrap(~Clade) +
  theme_gray())
## Warning: Removed 1 rows containing missing values (geom_point).